set more off
clear all

*********************************************************************************
***************************RECREATION********************************************
*********************************************************************************
import delimited "cu_US_recreation.txt", clear 

drop footnote_codes
drop if period=="M13"

gen price_item = substr(series_id, 9, 10)
replace price_item  = subinstr(price_item, " ", "", .)
gen series_type = substr(series_id, 1, 4)
gen area = substr(series_id, 5, 4)


keep if series_type=="CUUR"
keep if area=="0000"
keep if price_item=="SAR"
bys year: egen N=count(year) /*always 12, which is good*/
keep if N==12

by year: egen price_recreation=mean(value)
by year: gen id=_n
drop if id>1
keep year price_recreation

save TEMP_recreation, replace

*********************************************************************************
***************************ENTERTAINMENT*****************************************
*********************************************************************************

import delimited "mu_US_entertainment.txt", clear 

drop footnote_codes
drop if period=="M13"

gen price_item = substr(series_id, 9, 10)
replace price_item  = subinstr(price_item, " ", "", .)
gen series_type = substr(series_id, 1, 4)
gen area = substr(series_id, 5, 4)


keep if series_type=="MUUR"
keep if area=="0000"
keep if price_item=="SA6"
bys year: egen N=count(year) /*always 12, which is good*/
keep if N==12

by year: egen price_entertainment=mean(value)
by year: gen id=_n
drop if id>1
keep year price_entertainment

save TEMP_entertainment, replace


*********************************************************************************
****************************ALL ITEMS********************************************
*********************************************************************************


import delimited "cu_US_allitem.txt", clear 


drop footnote_codes
drop if period=="M13"

gen price_item = substr(series_id, 9, 10)
replace price_item  = subinstr(price_item, " ", "", .)
gen series_type = substr(series_id, 1, 4)
gen area = substr(series_id, 5, 4)



keep if series_type=="CUUR"
keep if area=="0000"
keep if price_item=="SA0"
bys year: egen N=count(year) /*always 12, which is good*/
keep if N==12

by year: egen price_all=mean(value) /*NOTE: same as on FRED, which is good*/
by year: gen id=_n
drop if id>1
keep year price_all

save CPI_all_long_USA, replace


merge 1:1 year using TEMP_recreation
drop _merge
merge 1:1 year using TEMP_entertainment
drop _merge

drop if (price_recreation==. & price_entertainment==.)


*smooth pasting
gen price_rec=price_entertainment
replace price_rec=price_rec[_n-1]*price_recreation/price_recreation[_n-1] if price_recreation[_n-1]!=.
gen price_rec_real=price_rec/price_all

keep year price_rec_real price_rec price_all
save price_rec_real_USA, replace

erase TEMP_recreation.dta
erase TEMP_entertainment.dta
